package com.informa.utils;

import static com.informa.utils.IOUtils.inputStreamForText;

import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;

import org.w3c.tidy.Tidy;

public class HtmlUtils {

	public static String asXml(String html) {

		Tidy tidy = new Tidy();
		tidy.setXmlOut(true);
		ByteArrayOutputStream out = new ByteArrayOutputStream();
		tidy.setQuiet( true );
		tidy.setShowWarnings( false );
		tidy.setShowErrors( 0 );
		tidy.parse(inputStreamForText(html), out);
		try {
			String xml = new String(out.toByteArray(), "UTF-8");
			return xml;
		} catch (UnsupportedEncodingException e) {
			throw new RuntimeException(e);
		}

	}

}
